{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "id": "eXNLu_-nIrJI"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'input_ids': [101, 2054, 2003, 2115, 2171, 1029, 102, 2026, 2171, 2003, 25353, 22144, 2378, 1012, 102], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from transformers import AutoTokenizer\n",
    "\n",
    "#加载分词工具\n",
    "tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased')\n",
    "\n",
    "tokenizer('What is your name?', 'My name is Sylvain.')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "id": "zVvslsfMIrIh"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'id': '5725d643271a42140099d285', 'title': 'Hellenistic_period', 'context': \"Ptolemy's family ruled Egypt until the Roman conquest of 30 BC. All the male rulers of the dynasty took the name Ptolemy. Ptolemaic queens, some of whom were the sisters of their husbands, were usually called Cleopatra, Arsinoe or Berenice. The most famous member of the line was the last queen, Cleopatra VII, known for her role in the Roman political battles between Julius Caesar and Pompey, and later between Octavian and Mark Antony. Her suicide at the conquest by Rome marked the end of Ptolemaic rule in Egypt though Hellenistic culture continued to thrive in Egypt throughout the Roman and Byzantine periods until the Muslim conquest.\", 'question': \"Till what year did Ptolemy's family rule Egypt?\", 'answers': {'text': ['30 BC'], 'answer_start': [57]}}\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "DatasetDict({\n",
       "    train: Dataset({\n",
       "        features: ['id', 'title', 'context', 'question', 'answers'],\n",
       "        num_rows: 10000\n",
       "    })\n",
       "    validation: Dataset({\n",
       "        features: ['id', 'title', 'context', 'question', 'answers'],\n",
       "        num_rows: 200\n",
       "    })\n",
       "})"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from datasets import load_dataset, load_from_disk\n",
    "\n",
    "#加载数据集\n",
    "#dataset = load_dataset('squad')\n",
    "dataset = load_from_disk('datas/squad')\n",
    "\n",
    "#采样,数据量太大了跑不动\n",
    "dataset['train'] = dataset['train'].shuffle().select(range(10000))\n",
    "dataset['validation'] = dataset['validation'].shuffle().select(range(200))\n",
    "\n",
    "print(dataset['train'][0])\n",
    "\n",
    "dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "input_ids 3 [[101, 6229, 2054, 2095, 2106, 23517, 1005, 1055, 2155, 3627, 5279, 1029, 102, 23517, 1005, 1055, 2155, 5451, 5279, 2127, 1996, 3142, 9187, 1997, 2382, 4647, 1012, 2035, 1996, 3287, 11117, 1997, 1996, 5321, 2165, 1996, 2171, 23517, 1012, 13866, 9890, 2863, 2594, 8603, 1010, 2070, 1997, 3183, 2020, 1996, 5208, 1997, 2037, 19089, 1010, 2020, 2788, 2170, 22003, 1010, 29393, 5740, 2063, 2030, 2022, 7389, 6610, 1012, 1996, 2087, 3297, 2266, 1997, 1996, 2240, 2001, 1996, 2197, 3035, 1010, 22003, 8890, 1010, 2124, 2005, 2014, 2535, 1999, 1996, 3142, 2576, 7465, 2090, 10396, 11604, 1998, 13433, 8737, 3240, 1010, 1998, 2101, 2090, 13323, 21654, 1998, 2928, 16262, 1012, 2014, 5920, 2012, 1996, 9187, 2011, 4199, 4417, 1996, 2203, 1997, 13866, 9890, 2863, 2594, 3627, 1999, 5279, 2295, 27464, 3226, 2506, 2000, 25220, 1999, 5279, 2802, 1996, 3142, 1998, 8734, 6993, 2127, 1996, 5152, 9187, 1012, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 1999, 2116, 4731, 2054, 5069, 2071, 2022, 2170, 1996, 15918, 2391, 1997, 1996, 2451, 1029, 102, 1037, 9047, 1013, 1052, 29700, 2497, 1013, 1010, 2030, 2270, 2160, 2003, 1010, 2750, 2049, 2171, 1010, 1037, 2797, 2160, 1010, 2021, 2003, 2170, 1037, 2270, 2160, 2138, 2009, 2003, 7000, 2000, 5271, 6544, 2000, 1996, 2236, 2270, 1012, 2009, 2003, 1037, 5948, 5069, 1999, 3725, 1010, 3163, 1010, 2047, 3414, 1010, 2660, 1010, 2710, 1010, 5842, 1998, 2047, 2563, 1012, 1999, 2116, 3182, 1010, 2926, 1999, 4731, 1010, 1037, 9047, 2064, 2022, 1996, 15918, 2391, 1997, 1996, 2451, 1012, 1996, 7896, 1997, 5212, 27233, 7274, 6235, 1996, 9047, 2004, 1996, 2540, 1997, 2563, 1012, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 2129, 2106, 1996, 2034, 13037, 2162, 2203, 1029, 102, 1999, 17405, 4647, 5170, 1010, 2007, 2010, 3239, 2006, 5665, 20379, 1010, 2719, 2019, 4707, 2007, 4199, 1005, 1055, 4099, 24181, 1997, 21959, 1010, 2029, 2419, 2000, 3142, 21277, 2007, 1996, 9353, 25293, 2319, 2223, 1010, 10588, 1998, 2566, 22864, 2819, 1012, 1996, 2034, 13037, 2162, 3631, 2041, 1999, 18164, 4647, 1010, 1998, 3092, 4297, 2239, 23633, 2135, 1999, 16327, 4647, 1012, 5170, 2506, 2000, 11897, 2162, 2114, 2566, 22864, 2239, 1998, 10588, 2005, 2491, 1997, 1996, 27198, 1006, 19627, 1011, 3263, 10705, 1007, 1998, 6439, 3142, 7670, 2005, 2512, 1011, 8830, 1999, 5483, 2011, 17657, 14832, 2050, 1012, 1999, 20003, 4647, 1010, 2076, 1996, 2117, 13037, 2162, 5170, 2001, 13079, 2135, 3249, 2012, 22330, 15460, 3401, 21890, 2721, 2063, 2011, 1996, 3142, 4013, 8663, 23722, 18828, 21864, 12273, 22638, 13109, 10631, 11483, 2271, 1998, 19382, 5280, 2439, 2035, 2049, 6500, 1999, 5483, 5372, 1012, 5483, 2001, 2085, 12246, 2716, 2046, 1996, 3142, 10336, 1997, 3747, 1010, 2295, 2009, 6025, 15087, 12645, 1012, 1996, 2203, 1997, 3424, 7446, 3593, 19382, 5280, 2234, 2043, 5170, 1058, 1005, 1055, 2365, 1010, 2566, 3366, 2271, 1010, 2001, 3249, 1998, 4110, 2011, 1996, 10900, 1999, 1996, 2353, 13037, 2162, 1006, 18225, 1516, 16923, 10705, 1007, 1012, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]\n",
      "\n",
      "attention_mask 3 [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]\n",
      "\n",
      "start_positions 3 [24, 104, 64]\n",
      "\n",
      "end_positions 3 [25, 105, 67]\n",
      "\n",
      "问题和文本\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[CLS] till what year did ptolemy's family rule egypt? [SEP] ptolemy's family ruled egypt until the roman conquest of 30 bc. all the male rulers of the dynasty took the name ptolemy. ptolemaic queens, some of whom were the sisters of their husbands, were usually called cleopatra, arsinoe or berenice. the most famous member of the line was the last queen, cleopatra vii, known for her role in the roman political battles between julius caesar and pompey, and later between octavian and mark antony. her suicide at the conquest by rome marked the end of ptolemaic rule in egypt though hellenistic culture continued to thrive in egypt throughout the roman and byzantine periods until the muslim conquest. [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]\n",
      "答案\n",
      "30\n",
      "原答案\n",
      "30 BC\n",
      "\n",
      "问题和文本\n",
      "[CLS] in many villages what establishment could be called the focal point of the community? [SEP] a pub / pʌb /, or public house is, despite its name, a private house, but is called a public house because it is licensed to sell alcohol to the general public. it is a drinking establishment in britain, ireland, new zealand, australia, canada, denmark and new england. in many places, especially in villages, a pub can be the focal point of the community. the writings of samuel pepys describe the pub as the heart of england. [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]\n",
      "答案\n",
      "the\n",
      "原答案\n",
      "the pub\n",
      "\n",
      "问题和文本\n",
      "[CLS] how did the first macedonian war end? [SEP] in 215 bc philip, with his eye on illyria, formed an alliance with rome's enemy hannibal of carthage, which led to roman alliances with the achaean league, rhodes and pergamum. the first macedonian war broke out in 212 bc, and ended inconclusively in 205 bc. philip continued to wage war against pergamon and rhodes for control of the aegean ( 204 - 200 bce ) and ignored roman demands for non - intervention in greece by invading attica. in 198 bc, during the second macedonian war philip was decisively defeated at cynoscephalae by the roman proconsul titus quinctius flamininus and macedon lost all its territories in greece proper. greece was now thoroughly brought into the roman sphere of influence, though it retained nominal autonomy. the end of antigonid macedon came when philip v's son, perseus, was defeated and captured by the romans in the third macedonian war ( 171 – 168 bce ). [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]\n",
      "答案\n",
      "inconclusive\n",
      "原答案\n",
      "inconclusively\n",
      "\n"
     ]
    }
   ],
   "source": [
    "#从官方教程里抄出来的函数,总之就是squad数据的处理函数,过程非常复杂,即使是官方的实现也是有问题的,我实在没本事写这个\n",
    "def prepare_train_features(examples):\n",
    "    # Some of the questions have lots of whitespace on the left, which is not useful and will make the\n",
    "    # truncation of the context fail (the tokenized question will take a lots of space). So we remove that\n",
    "    # left whitespace\n",
    "    examples[\"question\"] = [q.lstrip() for q in examples[\"question\"]]\n",
    "\n",
    "    # Tokenize our examples with truncation and padding, but keep the overflows using a stride. This results\n",
    "    # in one example possible giving several features when a context is long, each of those features having a\n",
    "    # context that overlaps a bit the context of the previous feature.\n",
    "    tokenized_examples = tokenizer(\n",
    "        examples['question'],\n",
    "        examples['context'],\n",
    "        truncation='only_second',\n",
    "        max_length=384,\n",
    "        stride=128,\n",
    "        return_overflowing_tokens=True,\n",
    "        return_offsets_mapping=True,\n",
    "        padding='max_length',\n",
    "    )\n",
    "\n",
    "    # Since one example might give us several features if it has a long context, we need a map from a feature to\n",
    "    # its corresponding example. This key gives us just that.\n",
    "    sample_mapping = tokenized_examples.pop(\"overflow_to_sample_mapping\")\n",
    "    # The offset mappings will give us a map from token to character position in the original context. This will\n",
    "    # help us compute the start_positions and end_positions.\n",
    "    offset_mapping = tokenized_examples.pop(\"offset_mapping\")\n",
    "\n",
    "    # Let's label those examples!\n",
    "    tokenized_examples[\"start_positions\"] = []\n",
    "    tokenized_examples[\"end_positions\"] = []\n",
    "\n",
    "    for i, offsets in enumerate(offset_mapping):\n",
    "        # We will label impossible answers with the index of the CLS token.\n",
    "        input_ids = tokenized_examples[\"input_ids\"][i]\n",
    "        cls_index = input_ids.index(tokenizer.cls_token_id)\n",
    "\n",
    "        # Grab the sequence corresponding to that example (to know what is the context and what is the question).\n",
    "        sequence_ids = tokenized_examples.sequence_ids(i)\n",
    "\n",
    "        # One example can give several spans, this is the index of the example containing this span of text.\n",
    "        sample_index = sample_mapping[i]\n",
    "        answers = examples[\"answers\"][sample_index]\n",
    "        # If no answers are given, set the cls_index as answer.\n",
    "        if len(answers[\"answer_start\"]) == 0:\n",
    "            tokenized_examples[\"start_positions\"].append(cls_index)\n",
    "            tokenized_examples[\"end_positions\"].append(cls_index)\n",
    "        else:\n",
    "            # Start/end character index of the answer in the text.\n",
    "            start_char = answers[\"answer_start\"][0]\n",
    "            end_char = start_char + len(answers[\"text\"][0])\n",
    "\n",
    "            # Start token index of the current span in the text.\n",
    "            token_start_index = 0\n",
    "            while sequence_ids[token_start_index] != 1:\n",
    "                token_start_index += 1\n",
    "\n",
    "            # End token index of the current span in the text.\n",
    "            token_end_index = len(input_ids) - 1\n",
    "            while sequence_ids[token_end_index] != 1:\n",
    "                token_end_index -= 1\n",
    "\n",
    "            # Detect if the answer is out of the span (in which case this feature is labeled with the CLS index).\n",
    "            if not (offsets[token_start_index][0] <= start_char\n",
    "                    and offsets[token_end_index][1] >= end_char):\n",
    "                tokenized_examples[\"start_positions\"].append(cls_index)\n",
    "                tokenized_examples[\"end_positions\"].append(cls_index)\n",
    "            else:\n",
    "                # Otherwise move the token_start_index and token_end_index to the two ends of the answer.\n",
    "                # Note: we could go after the last offset if the answer is the last word (edge case).\n",
    "                while token_start_index < len(offsets) and offsets[\n",
    "                        token_start_index][0] <= start_char:\n",
    "                    token_start_index += 1\n",
    "                tokenized_examples[\"start_positions\"].append(\n",
    "                    token_start_index - 1)\n",
    "                while offsets[token_end_index][1] >= end_char:\n",
    "                    token_end_index -= 1\n",
    "                tokenized_examples[\"end_positions\"].append(token_end_index + 1)\n",
    "\n",
    "    return tokenized_examples\n",
    "\n",
    "\n",
    "#调用squad数据预处理函数\n",
    "examples = prepare_train_features(dataset['train'][:3])\n",
    "\n",
    "#先看看处理后的结果\n",
    "for k, v in examples.items():\n",
    "    print(k, len(v), v)\n",
    "    print()\n",
    "\n",
    "#还原成文字查看,很显然,即使是huggingface的实现也是有问题的\n",
    "for i in range(len(examples['input_ids'])):\n",
    "    input_ids = examples['input_ids'][i]\n",
    "    start_positions = examples['start_positions'][i]\n",
    "    end_positions = examples['end_positions'][i]\n",
    "\n",
    "    print('问题和文本')\n",
    "    question_and_context = tokenizer.decode(input_ids)\n",
    "    print(question_and_context)\n",
    "\n",
    "    print('答案')\n",
    "    answer = tokenizer.decode(input_ids[start_positions:end_positions])\n",
    "    print(answer)\n",
    "\n",
    "    print('原答案')\n",
    "    original_answer = dataset['train'][i]['answers']['text'][0]\n",
    "    print(original_answer)\n",
    "    print()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "38167b5c8e554343916799f4c7a2e1f2",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/10 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "5d2bba39f468439fae2a5ffb49d8dc6c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'input_ids': [101, 6229, 2054, 2095, 2106, 23517, 1005, 1055, 2155, 3627, 5279, 1029, 102, 23517, 1005, 1055, 2155, 5451, 5279, 2127, 1996, 3142, 9187, 1997, 2382, 4647, 1012, 2035, 1996, 3287, 11117, 1997, 1996, 5321, 2165, 1996, 2171, 23517, 1012, 13866, 9890, 2863, 2594, 8603, 1010, 2070, 1997, 3183, 2020, 1996, 5208, 1997, 2037, 19089, 1010, 2020, 2788, 2170, 22003, 1010, 29393, 5740, 2063, 2030, 2022, 7389, 6610, 1012, 1996, 2087, 3297, 2266, 1997, 1996, 2240, 2001, 1996, 2197, 3035, 1010, 22003, 8890, 1010, 2124, 2005, 2014, 2535, 1999, 1996, 3142, 2576, 7465, 2090, 10396, 11604, 1998, 13433, 8737, 3240, 1010, 1998, 2101, 2090, 13323, 21654, 1998, 2928, 16262, 1012, 2014, 5920, 2012, 1996, 9187, 2011, 4199, 4417, 1996, 2203, 1997, 13866, 9890, 2863, 2594, 3627, 1999, 5279, 2295, 27464, 3226, 2506, 2000, 25220, 1999, 5279, 2802, 1996, 3142, 1998, 8734, 6993, 2127, 1996, 5152, 9187, 1012, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'start_positions': 24, 'end_positions': 25}\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "DatasetDict({\n",
       "    train: Dataset({\n",
       "        features: ['input_ids', 'attention_mask', 'start_positions', 'end_positions'],\n",
       "        num_rows: 10107\n",
       "    })\n",
       "    validation: Dataset({\n",
       "        features: ['input_ids', 'attention_mask', 'start_positions', 'end_positions'],\n",
       "        num_rows: 203\n",
       "    })\n",
       "})"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#应用预处理函数\n",
    "dataset = dataset.map(\n",
    "    function=prepare_train_features,\n",
    "    batched=True,\n",
    "    remove_columns=['id', 'title', 'context', 'question', 'answers'])\n",
    "\n",
    "print(dataset['train'][0])\n",
    "\n",
    "dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1263,\n",
       " {'input_ids': tensor([[  101,  2073,  2106,  ...,     0,     0,     0],\n",
       "          [  101,  2054,  2001,  ...,     0,     0,     0],\n",
       "          [  101, 15854,  6238,  ...,     0,     0,     0],\n",
       "          ...,\n",
       "          [  101,  2040,  2001,  ...,     0,     0,     0],\n",
       "          [  101,  2054,  2724,  ...,     0,     0,     0],\n",
       "          [  101,  2040,  2550,  ...,     0,     0,     0]]),\n",
       "  'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],\n",
       "          [1, 1, 1,  ..., 0, 0, 0],\n",
       "          [1, 1, 1,  ..., 0, 0, 0],\n",
       "          ...,\n",
       "          [1, 1, 1,  ..., 0, 0, 0],\n",
       "          [1, 1, 1,  ..., 0, 0, 0],\n",
       "          [1, 1, 1,  ..., 0, 0, 0]]),\n",
       "  'start_positions': tensor([ 20,  55, 134,  47, 100, 153,  73,  41]),\n",
       "  'end_positions': tensor([ 23,  58, 136,  51, 100, 154,  75,  42])})"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import torch\n",
    "from transformers.data.data_collator import default_data_collator\n",
    "\n",
    "#数据加载器\n",
    "loader = torch.utils.data.DataLoader(\n",
    "    dataset=dataset['train'],\n",
    "    batch_size=8,\n",
    "    collate_fn=default_data_collator,\n",
    "    shuffle=True,\n",
    "    drop_last=True,\n",
    ")\n",
    "\n",
    "for i, data in enumerate(loader):\n",
    "    break\n",
    "\n",
    "len(loader), data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_transform.weight', 'vocab_layer_norm.weight', 'vocab_projector.bias', 'vocab_projector.weight', 'vocab_transform.bias', 'vocab_layer_norm.bias']\n",
      "- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
      "- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
      "Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForQuestionAnswering: ['vocab_transform.weight', 'vocab_layer_norm.weight', 'vocab_projector.bias', 'vocab_projector.weight', 'vocab_transform.bias', 'vocab_layer_norm.bias']\n",
      "- This IS expected if you are initializing DistilBertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
      "- This IS NOT expected if you are initializing DistilBertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
      "Some weights of DistilBertForQuestionAnswering were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']\n",
      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "6636.4418\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "(tensor(5.8415, grad_fn=<DivBackward0>),\n",
       " torch.Size([8, 384]),\n",
       " torch.Size([8, 384]))"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from transformers import AutoModelForQuestionAnswering, DistilBertModel\n",
    "\n",
    "#加载模型\n",
    "#model = AutoModelForQuestionAnswering.from_pretrained('distilbert-base-uncased')\n",
    "\n",
    "\n",
    "#定义下游任务模型\n",
    "class Model(torch.nn.Module):\n",
    "    def __init__(self):\n",
    "        super().__init__()\n",
    "        self.pretrained = DistilBertModel.from_pretrained(\n",
    "            'distilbert-base-uncased')\n",
    "\n",
    "        self.fc = torch.nn.Sequential(torch.nn.Dropout(0.1),\n",
    "                                      torch.nn.Linear(768, 2))\n",
    "        \n",
    "        #加载预训练模型的参数\n",
    "        parameters = AutoModelForQuestionAnswering.from_pretrained('distilbert-base-uncased')\n",
    "        self.fc[1].load_state_dict(parameters.qa_outputs.state_dict())\n",
    "\n",
    "    def forward(self, input_ids, attention_mask, start_positions,\n",
    "                end_positions):\n",
    "        #[b, lens] -> [b, lens, 768]\n",
    "        logits = self.pretrained(input_ids=input_ids,\n",
    "                                 attention_mask=attention_mask)\n",
    "        logits = logits.last_hidden_state\n",
    "\n",
    "        #[b, lens, 768] -> [b, lens, 2]\n",
    "        logits = self.fc(logits)\n",
    "\n",
    "        #[b, lens, 2] -> [b, lens, 1],[b, lens, 1]\n",
    "        start_logits, end_logits = logits.split(1, dim=2)\n",
    "\n",
    "        #[b, lens, 1] -> [b, lens]\n",
    "        start_logits = start_logits.squeeze(2)\n",
    "        end_logits = end_logits.squeeze(2)\n",
    "\n",
    "        #起点和终点都不能超出句子的长度\n",
    "        lens = start_logits.shape[1]\n",
    "        start_positions = start_positions.clamp(0, lens)\n",
    "        end_positions = end_positions.clamp(0, lens)\n",
    "\n",
    "        criterion = torch.nn.CrossEntropyLoss(ignore_index=lens)\n",
    "\n",
    "        start_loss = criterion(start_logits, start_positions)\n",
    "        end_loss = criterion(end_logits, end_positions)\n",
    "        loss = (start_loss + end_loss) / 2\n",
    "\n",
    "        return {\n",
    "            'loss': loss,\n",
    "            'start_logits': start_logits,\n",
    "            'end_logits': end_logits\n",
    "        }\n",
    "\n",
    "\n",
    "model = Model()\n",
    "\n",
    "#统计参数量\n",
    "print(sum(i.numel() for i in model.parameters()) / 10000)\n",
    "\n",
    "out = model(**data)\n",
    "\n",
    "out['loss'], out['start_logits'].shape, out['end_logits'].shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0\n",
      "10\n",
      "58.484375 61.0\n",
      "input_ids= [CLS] what shape are pyrenoids? [SEP] the chloroplasts of some hornworts and algae contain structures called pyrenoids. they are not found in higher plants. pyrenoids are roughly spherical and highly refractive bodies which are a site of starch accumulation in plants that contain them. they consist of a matrix opaque to electrons, surrounded by two hemispherical starch plates. the starch is accumulated as the pyrenoids mature. in algae with carbon concentrating mechanisms, the enzyme rubisco is found in the pyrenoids. starch can also accumulate around the pyrenoids when co2 is scarce. pyrenoids can divide to form new pyrenoids, or be produced \" de novo \". [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]\n",
      "pred_answer= what shape are pyrenoids? [SEP] the chloroplasts\n",
      "label_answer= roughly\n",
      "\n",
      "input_ids= [CLS] what year marked the lowest ratings for the drama lost? [SEP] the network began running into some trouble in the ratings by 2010. that year, the sixth and final season of lost became the drama's lowest - rated season since its debut in 2004. ratings for the once - instant hit ugly betty collapsed dramatically after it was moved to fridays at the start of its fourth season in the fall of 2009 ; an attempt to boost ratings by moving the dramedy to wednesdays failed, with its ultimate cancellation by the network eliciting negative reaction from the public, and particularly the show's fanbase. with the network's two former hit shows now out of the picture, the network's remaining top veteran shows desperate housewives and grey's anatomy, and another hit drama brothers & sisters, all ended the 2009 – 10 season recorded their lowest ratings ever. [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]\n",
      "pred_answer= \n",
      "label_answer= \n",
      "\n",
      "input_ids= [CLS] what is the name of another algorithm useful for conveniently testing the primality of large numbers? [SEP] the property of being prime ( or not ) is called primality. a simple but slow method of verifying the primality of a given number n is known as trial division. it consists of testing whether n is a multiple of any integer between 2 and. algorithms much more efficient than trial division have been devised to test the primality of large numbers. these include the miller – rabin primality test, which is fast but has a small probability of error, and the aks primality test, which always produces the correct answer in polynomial time but is too slow to be practical. particularly fast methods are available for numbers of special forms, such as mersenne numbers. as of january 2016 [ update ], the largest known prime number has 22, 338, 618 decimal digits. [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]\n",
      "pred_answer= \n",
      "label_answer= the aks primality\n",
      "\n",
      "input_ids= [CLS] when do members proceed to vote on whether they agree to the principles of the final bill? [SEP] stage 3 is the final stage of the bill and is considered at a meeting of the whole parliament. this stage comprises two parts : consideration of amendments to the bill as a general debate, and a final vote on the bill. opposition members can table \" wrecking amendments \" to the bill, designed to thwart further progress and take up parliamentary time, to cause the bill to fall without a final vote being taken. after a general debate on the final form of the bill, members proceed to vote at decision time on whether they agree to the general principles of the final bill. [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]\n",
      "pred_answer= do members proceed to vote on whether they agree to the principles of the final bill? [SEP] stage 3 is the final stage of the bill and is considered at a meeting\n",
      "label_answer= decision\n",
      "\n"
     ]
    }
   ],
   "source": [
    "#测试\n",
    "def test():\n",
    "    model.eval()\n",
    "\n",
    "    #数据加载器\n",
    "    loader_val = torch.utils.data.DataLoader(\n",
    "        dataset=dataset['validation'],\n",
    "        batch_size=16,\n",
    "        collate_fn=default_data_collator,\n",
    "        shuffle=True,\n",
    "        drop_last=True,\n",
    "    )\n",
    "\n",
    "    start_offset = 0\n",
    "    end_offset = 0\n",
    "    total = 0\n",
    "    for i, data in enumerate(loader_val):\n",
    "        #计算\n",
    "        with torch.no_grad():\n",
    "            out = model(**data)\n",
    "\n",
    "        start_offset += (out['start_logits'].argmax(dim=1) -\n",
    "                         data['start_positions']).abs().sum().item()\n",
    "\n",
    "        end_offset += (out['end_logits'].argmax(dim=1) -\n",
    "                       data['end_positions']).abs().sum().item()\n",
    "\n",
    "        total += 16\n",
    "\n",
    "        if i % 10 == 0:\n",
    "            print(i)\n",
    "\n",
    "        if i == 50:\n",
    "            break\n",
    "\n",
    "    print(start_offset / total, end_offset / total)\n",
    "\n",
    "    start_logits = out['start_logits'].argmax(dim=1)\n",
    "    end_logits = out['end_logits'].argmax(dim=1)\n",
    "    for i in range(4):\n",
    "        input_ids = data['input_ids'][i]\n",
    "\n",
    "        pred_answer = input_ids[start_logits[i]:end_logits[i]]\n",
    "\n",
    "        label_answer = input_ids[\n",
    "            data['start_positions'][i]:data['end_positions'][i]]\n",
    "\n",
    "        print('input_ids=', tokenizer.decode(input_ids))\n",
    "        print('pred_answer=', tokenizer.decode(pred_answer))\n",
    "        print('label_answer=', tokenizer.decode(label_answer))\n",
    "        print()\n",
    "\n",
    "\n",
    "test()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/root/anaconda3/envs/cpu/lib/python3.6/site-packages/transformers/optimization.py:309: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  FutureWarning,\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 5.984038352966309 1.9984164687252575e-05 79.125 57.25\n",
      "50 4.169722080230713 1.9192399049881235e-05 38.875 36.75\n",
      "100 4.359471321105957 1.84006334125099e-05 63.125 44.5\n",
      "150 3.296112537384033 1.760886777513856e-05 43.0 45.0\n",
      "200 3.181190252304077 1.6817102137767223e-05 40.75 57.875\n",
      "250 3.5123019218444824 1.6025336500395887e-05 60.25 64.75\n",
      "300 2.9549078941345215 1.5233570863024545e-05 17.0 40.375\n",
      "350 3.4506635665893555 1.4441805225653207e-05 7.625 13.5\n",
      "400 2.231827735900879 1.365003958828187e-05 8.125 7.5\n",
      "450 2.395646572113037 1.2858273950910532e-05 7.375 13.0\n",
      "500 2.952989101409912 1.2066508313539194e-05 30.5 34.375\n",
      "550 2.6806797981262207 1.1274742676167856e-05 23.625 22.25\n",
      "600 2.2227702140808105 1.0482977038796518e-05 20.375 33.875\n",
      "650 1.6418771743774414 9.69121140142518e-06 10.0 12.375\n",
      "700 1.9634692668914795 8.899445764053842e-06 12.75 13.25\n",
      "750 1.7344026565551758 8.107680126682502e-06 36.875 10.5\n",
      "800 2.1363699436187744 7.315914489311164e-06 5.0 5.0\n",
      "850 1.0939271450042725 6.5241488519398265e-06 5.25 1.375\n",
      "900 1.04538094997406 5.732383214568488e-06 1.5 8.0\n",
      "950 1.681868553161621 4.94061757719715e-06 8.125 15.375\n",
      "1000 1.3935863971710205 4.148851939825812e-06 9.0 8.375\n",
      "1050 2.076796293258667 3.3570863024544735e-06 5.25 29.75\n",
      "1100 1.2922166585922241 2.565320665083136e-06 3.0 3.125\n",
      "1150 2.257178783416748 1.7735550277117974e-06 4.875 11.5\n",
      "1200 1.7738124132156372 9.817893903404593e-07 9.75 21.0\n",
      "1250 1.4367058277130127 1.9002375296912116e-07 5.625 2.0\n"
     ]
    }
   ],
   "source": [
    "from transformers import AdamW\n",
    "from transformers.optimization import get_scheduler\n",
    "\n",
    "\n",
    "#训练\n",
    "def train():\n",
    "    optimizer = AdamW(model.parameters(), lr=2e-5)\n",
    "    scheduler = get_scheduler(name='linear',\n",
    "                              num_warmup_steps=0,\n",
    "                              num_training_steps=len(loader),\n",
    "                              optimizer=optimizer)\n",
    "\n",
    "    model.train()\n",
    "    for i, data in enumerate(loader):\n",
    "        out = model(**data)\n",
    "        loss = out['loss']\n",
    "\n",
    "        loss.backward()\n",
    "        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)\n",
    "\n",
    "        optimizer.step()\n",
    "        scheduler.step()\n",
    "\n",
    "        optimizer.zero_grad()\n",
    "        model.zero_grad()\n",
    "\n",
    "        if i % 50 == 0:\n",
    "            lr = optimizer.state_dict()['param_groups'][0]['lr']\n",
    "\n",
    "            start_offset = (out['start_logits'].argmax(dim=1) -\n",
    "                            data['start_positions']).abs().sum().item() / 8\n",
    "\n",
    "            end_offset = (out['end_logits'].argmax(dim=1) -\n",
    "                          data['end_positions']).abs().sum().item() / 8\n",
    "\n",
    "            print(i, loss.item(), lr, start_offset, end_offset)\n",
    "\n",
    "    torch.save(model, 'models/3.阅读理解.model')\n",
    "\n",
    "\n",
    "train()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0\n",
      "10\n",
      "9.78125 13.145833333333334\n",
      "input_ids= [CLS] what are the top 4 - 5 % graduating students honored with? [SEP] harvard's academic programs operate on a semester calendar beginning in early september and ending in mid - may. undergraduates typically take four half - courses per term and must maintain a four - course rate average to be considered full - time. in many concentrations, students can elect to pursue a basic program or an honors - eligible program requiring a senior thesis and / or advanced course work. students graduating in the top 4 – 5 % of the class are awarded degrees summa cum laude, students in the next 15 % of the class are awarded magna cum laude, and the next 30 % of the class are awarded cum laude. harvard has chapters of academic honor societies such as phi beta kappa and various committees and departments also award several hundred named prizes annually. harvard, along with other universities, has been accused of grade inflation, although there is evidence that the quality of the student body and its motivation have also increased. harvard college reduced the number of students who receive latin honors from 90 % in 2004 to 60 % in 2005. moreover, the honors of \" john harvard scholar \" and \" harvard college scholar \" will now be given only to the top 5 percent and the next 5 percent of each class. [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]\n",
      "pred_answer= degrees summa cum\n",
      "label_answer= summa cum\n",
      "\n",
      "input_ids= [CLS] what other cbs talk show played, after the main one that began immediately after super bowl 50? [SEP] as opposed to broadcasts of primetime series, cbs broadcast special episodes of its late night talk shows as its lead - out programs for super bowl 50, beginning with a special episode of the late show with stephen colbert following the game. following a break for late local programming, cbs also aired a special episode of the late late show with james corden. [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]\n",
      "pred_answer= late\n",
      "label_answer= the late late show with james cord\n",
      "\n",
      "input_ids= [CLS] how much energy loss did he expect would occur? [SEP] in 1935, in an annual birthday celebration interview, tesla announced a method of transmitting mechanical energy with minimal loss over any terrestrial distance, a related new means of communication, and a method of accurately determining the location of underground mineral deposits. [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]\n",
      "pred_answer= minimal loss over any terrestrial\n",
      "label_answer= \n",
      "\n",
      "input_ids= [CLS] what is negatively correlated to the duration of economic growth? [SEP] according to international monetary fund economists, inequality in wealth and income is negatively correlated with the duration of economic growth spells ( not the rate of growth ). high levels of inequality prevent not just economic prosperity, but also the quality of a country's institutions and high levels of education. according to imf staff economists, \" if the income share of the top 20 percent ( the rich ) increases, then gdp growth actually declines over the medium term, suggesting that the benefits don't trickle down. in contrast, an increase in the income share of the bottom 20 percent ( the poor ) is associated with higher gdp growth. the poor and the middle class matter the most for growth via a number of interrelated economic, social, and political channels. \" [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]\n",
      "pred_answer= inequality in wealth and\n",
      "label_answer= inequality in wealth and\n",
      "\n"
     ]
    }
   ],
   "source": [
    "model = torch.load('models/3.阅读理解.model')\n",
    "test()"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "name": "Question Answering on SQUAD",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
